#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')

library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr)
library(biomaRt)
library(anRichment) ; library(BrainDiseaseCollection)
suppressWarnings(suppressMessages(library(WGCNA)))

SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

Load preprocessed dataset (preprocessing code in 19_10_14_data_preprocessing.Rmd) and clustering (pipeline in 19_10_21_WGCNA.Rmd)

# Gandal dataset
load('./../Data/preprocessed_data_imputed.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame


# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>% 
              mutate('ID'=as.character(ensembl_gene_id)) %>% 
              dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
              mutate('Neuronal'=1)


# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_08-29-2019_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]


# Clusterings
clusterings = read_csv('./../Data/clusters_imputed.csv')


# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>% 
  mutate(`gene-score`=ifelse(is.na(`gene-score`), 'None', `gene-score`)) %>%
  left_join(GO_neuronal, by='ID') %>% left_join(clusterings, by='ID') %>%
  mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
  mutate(gene.score=ifelse(`gene-score`=='None' & Neuronal==1, 'Neuronal', `gene-score`), 
         significant=padj<0.05 & !is.na(padj))

# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
               host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)

genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))


clustering_selected = 'DynamicHybrid'
genes_info$Module = genes_info[,clustering_selected]

dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '_imputed.csv'))
dataset$Module = dataset[,clustering_selected]


rm(DE_info, GO_annotations, clusterings, getinfo, mart, dds)


Relation to external clinical traits


Quantifying module-trait associations


Using the hetcor function, that calculates Pearson, polyserial or polychoric correlations depending on the type of variables involved.

datTraits = datMeta %>% dplyr::select(Diagnosis, Region, Sex, Age, PMI, RNAExtractionBatch) %>%
            dplyr::rename('ExtractionBatch' = RNAExtractionBatch)

# Recalculate MEs with color labels
ME_object = datExpr %>% t %>% moduleEigengenes(colors = genes_info$Module)
MEs = orderMEs(ME_object$eigengenes)

# Calculate correlation between eigengenes and the traits and their p-values
moduleTraitCor = MEs %>% apply(2, function(x) hetcor(x, datTraits)$correlations[1,-1]) %>% t
rownames(moduleTraitCor) = colnames(MEs)
colnames(moduleTraitCor) = colnames(datTraits)
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nrow(datExpr))

# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)

# In case there are any NAs
if(sum(!complete.cases(moduleTraitCor))>0){
  print(paste0(sum(is.na(moduleTraitCor)),' correlation(s) could not be calculated')) 
}
## [1] "1 correlation(s) could not be calculated"
rm(ME_object)

Note: The correlations between Modules and Diagonsis that cannot be calculated, weirdly enough, is because the initial correlation is too high, so it would be a very bad thing to lose these modules because of this numerical error. I’m going to fill in the values using the polyserial function, which doesn’t give exactly the same results as the hetcor() function, but it’s quite similar.

# Calculate the correlation tha failed with hetcor()
missing_modules = rownames(moduleTraitCor)[is.na(moduleTraitCor[,1])]

for(m in missing_modules){
  cat(paste0('Correcting Module-Diagnosis correlation for Module ', gsub('ME','',m)))
  moduleTraitCor[m,'Diagnosis'] = polyserial(MEs[,m], datTraits$Diagnosis)
}
## Correcting Module-Diagnosis correlation for Module #00C0B5
rm(missing_modules)

I’m going to select all the modules that have an absolute correlation higher than 0.9 with Diagnosis to study them

# Sort moduleTraitCor by Diagnosis
moduleTraitCor = moduleTraitCor[order(moduleTraitCor[,1], decreasing=TRUE),]
moduleTraitPvalue = moduleTraitPvalue[order(moduleTraitCor[,1], decreasing=TRUE),]

# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)


labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels =  gsub('ME','',rownames(moduleTraitCor)), 
               yColorWidth=0, colors = brewer.pal(11,'PiYG'), bg.lab.y = gsub('ME','',rownames(moduleTraitCor)),
               textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.8, cex.lab.y = 0.75, zlim = c(-1,1),
               main = paste('Module-Trait relationships'))

diagnosis_cor = data.frame('Module' = gsub('ME','',rownames(moduleTraitCor)),
                           'MTcor' = moduleTraitCor[,'Diagnosis'],
                           'MTpval' = moduleTraitPvalue[,'Diagnosis'])

genes_info = genes_info %>% left_join(diagnosis_cor, by='Module')

rm(moduleTraitPvalue, datTraits, textMatrix, diagnosis_cor)


Studying the modules with the highest absolute correlation to Diagnosis


The modules consist mainly of points with very high (absolute) values in PC2 (which we know is related to lfc), so this result is consistent with the high correlation between Module and Diagnosis, although some of the points with the highest PC2 values do not belong to these top modules

top_modules = gsub('ME','',rownames(moduleTraitCor)[abs(moduleTraitCor[,'Diagnosis'])>0.9])

cat(paste0('Top modules selected: ', paste(top_modules, collapse=', '),'\n'))
## Top modules selected: #00C0B5, #E46DF5
pca = datExpr %>% prcomp

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
            dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
            mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
            mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
                   alpha = ifelse(ImportantModules=='Others', 0.2, 0.4),
                   gene_id = paste0(ID, ' (', external_gene_id, ')'))

table(plot_data$ImportantModules)
## 
## #00C0B5 #E46DF5  Others 
##    1341    1011   13795
ggplotly(plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) + 
         geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() + 
           ggtitle('Modules with strongest relation to Diagnosis'))
rm(pca)




Module Membership vs Gene Significance


create_plot = function(module){
  
  plot_data = dataset %>% dplyr::select(ID, paste0('MM.',gsub('#','',module)), GS, gene.score) %>% filter(dataset$Module==module)
  colnames(plot_data)[2] = 'Module'
  
  SFARI_colors = as.numeric(names(table(as.character(plot_data$gene.score)[plot_data$gene.score!='None'])))
  
  p = ggplotly(plot_data %>% ggplot(aes(Module, GS, color=gene.score)) + geom_point(alpha=0.5, aes(ID=ID)) + ylab('Gene Significance') +
               scale_color_manual(values=SFARI_colour_hue(r=c(SFARI_colors,8))) + theme_minimal() + xlab('Module Membership') +
               ggtitle(paste0('Module ', module,'  (MTcor = ', round(moduleTraitCor[paste0('ME',module),1],2),')')))
  
  return(p)
}

create_plot(top_modules[1])
create_plot(top_modules[2])
rm(create_plot)




SFARI Genes


List of top SFARI Genes in top modules ordered by SFARI score and Gene Significance

table_data = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
             dplyr::select(ID, external_gene_id, GS, gene.score, Module) %>% arrange(gene.score, desc(abs(GS))) %>%
             dplyr::rename('Ensembl ID'=ID, 'Gene Symbol'=external_gene_id, 
                    'SFARI score'=gene.score, 'Gene Significance'=GS)

kable(table_data %>% filter(Module == top_modules[1] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
      caption=paste0('Top SFARI Genes for Module ', top_modules[1]))
Top SFARI Genes for Module #00C0B5
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000110066 SUV420H1 0.6677451 1
ENSG00000141431 ASXL3 0.5523087 1
ENSG00000189056 RELN -0.0965191 1
ENSG00000108510 MED13 0.7613289 2
ENSG00000038382 TRIO 0.6348694 2
ENSG00000117139 KDM5B 0.6007663 2
ENSG00000181722 ZBTB20 0.8748406 3
ENSG00000168769 TET2 0.8167971 3
ENSG00000181090 EHMT1 0.8046105 3
ENSG00000162946 DISC1 0.6680125 3
ENSG00000083168 KAT6A 0.6183379 3
ENSG00000205581 HMGN1 0.5859493 3
ENSG00000166833 NAV2 0.4475125 3
ENSG00000149571 KIRREL3 0.4276545 3
ENSG00000197724 PHF2 0.4062299 3
ENSG00000103257 SLC7A5 0.3830543 3
ENSG00000145020 AMT 0.3789058 3
ENSG00000196839 ADA 0.3711328 3
ENSG00000101004 NINL 0.3483560 3
ENSG00000134698 AGO4 0.2741164 3
ENSG00000128573 FOXP2 0.2472601 3
ENSG00000118432 CNR1 0.2121820 3
ENSG00000165995 CACNB2 0.1659124 3
ENSG00000112902 SEMA5A 0.1492722 3
kable(table_data %>% filter(Module == top_modules[2] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
      caption=paste0('Top SFARI Genes for Module ', top_modules[2]))
Top SFARI Genes for Module #E46DF5
Ensembl ID Gene Symbol Gene Significance SFARI score
ENSG00000136535 TBR1 -0.6421845 1
ENSG00000136531 SCN2A -0.5865435 1
ENSG00000174469 CNTNAP2 -0.7276510 2
ENSG00000155974 GRIP1 -0.6495467 2
ENSG00000157445 CACNA2D3 -0.3055170 2
ENSG00000144619 CNTN4 -0.2873307 2
ENSG00000074590 NUAK1 -0.8334792 3
ENSG00000144285 SCN1A -0.8124748 3
ENSG00000196876 SCN8A -0.7956552 3
ENSG00000170579 DLGAP1 -0.7856950 3
ENSG00000132294 EFR3A -0.7769346 3
ENSG00000005955 GGNBP2 -0.7735918 3
ENSG00000078328 RBFOX1 -0.7616138 3
ENSG00000197535 MYO5A -0.7372328 3
ENSG00000157087 ATP2B2 -0.7189717 3
ENSG00000182621 PLCB1 -0.7144580 3
ENSG00000175497 DPP10 -0.7073016 3
ENSG00000021645 NRXN3 -0.6827653 3
ENSG00000003147 ICA1 -0.6806178 3
ENSG00000170745 KCNS3 -0.5681164 3
ENSG00000183454 GRIN2A -0.5680448 3
ENSG00000185345 PARK2 -0.4978854 3
ENSG00000050030 KIAA2022 -0.4407621 3
ENSG00000185008 ROBO2 -0.3077378 3
ENSG00000182256 GABRG3 -0.2590592 3
ENSG00000149970 CNKSR2 0.1124385 3
ENSG00000160305 DIP2A 0.0578090 3
ENSG00000171723 GPHN -0.0362838 3
ENSG00000166147 FBN1 0.0110186 3

Modules with the strongest module-diagnosis correlation should have the highest percentage of SFARI Genes, but this doesn’t seem to be the case (even the opposite may be true)

plot_data = dataset %>% mutate('hasSFARIscore' = gene.score!='None') %>% 
            group_by(Module, MTcor, hasSFARIscore) %>% summarise(p=n()) %>% 
            left_join(dataset %>% group_by(Module) %>% summarise(n=n()), by='Module') %>% 
            mutate(p=round(p/n*100,2)) 

for(i in 1:nrow(plot_data)){
  this_row = plot_data[i,]
  if(this_row$hasSFARIscore==FALSE & this_row$p==100){
    new_row = this_row
    new_row$hasSFARIscore = TRUE
    new_row$p = 0
    plot_data = plot_data %>% rbind(new_row)
  }
}

plot_data = plot_data %>% filter(hasSFARIscore==TRUE)

ggplotly(plot_data %>% ggplot(aes(MTcor, p, size=n)) + geom_smooth(color='gray', se=FALSE) +
         geom_point(color=plot_data$Module, alpha=0.5, aes(id=Module)) + geom_hline(yintercept=mean(plot_data$p), color='gray') +
         xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
         theme_minimal() + theme(legend.position = 'none'))
rm(i, this_row, new_row, plot_data)

Breaking the SFARI genes by score

Perhaps there aren’t enough genes in each score for these plots to be reliable

scores = c(1,2,3,4,5,6,'None')

plot_data = dataset %>% group_by(Module, MTcor, gene.score) %>% summarise(n=n()) %>% 
            left_join(dataset %>% group_by(Module) %>% summarise(N=n()), by='Module') %>% 
            mutate(p=round(n/N*100,2), gene.score = as.character(gene.score))

for(i in 1:nrow(plot_data)){
  this_row = plot_data[i,]
  if(sum(plot_data$Module == this_row$Module)<7){
    missing_scores = which(! scores %in% plot_data$gene.score[plot_data$Module == this_row$Module])
    for(s in missing_scores){
      new_row = this_row
      new_row$gene.score = as.character(s)
      new_row$n = 0
      new_row$p = 0
      plot_data = plot_data %>% rbind(new_row) 
    }
  }
}

plot_data = plot_data %>% filter(gene.score != 'None')

plot_function = function(i){
  i = 2*i-1
  plot_list = list()
  for(j in 1:2){
    plot_list[[j]] = ggplotly(plot_data %>% filter(gene.score==scores[i+j-1]) %>% ggplot(aes(MTcor, p, size=n)) + 
                geom_smooth(color='gray', se=FALSE) +
                geom_point(color=plot_data$Module[plot_data$gene.score==scores[i+j-1]], alpha=0.5, aes(id=Module)) +
                geom_hline(yintercept=mean(plot_data$p[plot_data$gene.score==scores[i+j-1]]), color='gray') +
                xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
                theme_minimal() + theme(legend.position = 'none'))
  }
  p = subplot(plot_list, nrows=1) %>% layout(annotations = list(
    list(x = 0.2 , y = 1.05, text = paste0('SFARI score ', scores[i]), showarrow = F, xref='paper', yref='paper'),
    list(x = 0.8 , y = 1.05, text = paste0('SFARI score ', scores[i+1]), showarrow = F, xref='paper', yref='paper')))
  
  return(p)
}

plot_function(1)
plot_function(2)
plot_function(3)
rm(i, s, this_row, new_row, plot_function)




Module Eigengenes


Since these modules have the strongest relation to autism, this pattern should be reflected in their model eigengenes, having two different behaviours for the samples corresponding to autism and the ones corresponding to control.

In both cases, the Eigengenes separate the behaviour between autism and control samples very clearly!

plot_EGs = function(module){

  plot_data = data.frame('ID' = rownames(MEs), 'MEs' = MEs[,paste0('ME',module)], 'Diagnosis' = datMeta$Diagnosis)

  p = plot_data %>% ggplot(aes(Diagnosis, MEs, fill=Diagnosis)) + geom_boxplot() + theme_minimal() + theme(legend.position='none') +
                    ggtitle(paste0('Module ', module, '  (MTcor=',round(moduleTraitCor[paste0('ME',module),1],2),')'))
  return(p)
}

p1 = plot_EGs(top_modules[1])
p2 = plot_EGs(top_modules[2])

grid.arrange(p1, p2, nrow=1)

rm(plot_EGs, p1, p2)




Identifying important genes


Selecting the modules with the highest correlation to Diagnosis, and, from them, the genes with the highest module membership-(absolute) gene significance

*Ordered by \(\frac{MM+|GS|}{2}\)

There aren’t that many SFARI genes in the top genes of the modules and not a single one belonging to scores 1 and 2

create_table = function(module){
  top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>% 
              dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
              filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>% 
              mutate(importance = (MM+abs(GS))/2) %>% arrange(by=-importance) %>% top_n(20)
  return(top_genes)
}

top_genes = list()
for(i in 1:length(top_modules)) top_genes[[i]] = create_table(top_modules[i])

kable(top_genes[[1]], caption=paste0('Top 10 genes for module ', top_modules[1], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[1]),1],2),')'))
Top 10 genes for module #00C0B5 (MTcor = 1)
ID external_gene_id MM GS gene.score importance
ENSG00000143384 MCL1 0.8552824 0.9032620 None 0.8792722
ENSG00000003402 CFLAR 0.8911964 0.8305399 None 0.8608681
ENSG00000181722 ZBTB20 0.8304805 0.8748406 3 0.8526606
ENSG00000089159 PXN 0.7949400 0.8685202 None 0.8317301
ENSG00000158615 PPP1R15B 0.8294061 0.8322220 None 0.8308141
ENSG00000184384 MAML2 0.7956773 0.8559612 None 0.8258192
ENSG00000196935 SRGAP1 0.7227821 0.9258820 None 0.8243321
ENSG00000161638 ITGA5 0.7907543 0.8384614 None 0.8146079
ENSG00000196576 PLXNB2 0.7798962 0.8458053 None 0.8128508
ENSG00000120278 PLEKHG1 0.8021478 0.8151698 None 0.8086588
ENSG00000138119 MYOF 0.7678768 0.8442134 None 0.8060451
ENSG00000150457 LATS2 0.7581871 0.8437052 None 0.8009462
ENSG00000124782 RREB1 0.7566820 0.8375656 None 0.7971238
ENSG00000172493 AFF1 0.7590609 0.8347661 None 0.7969135
ENSG00000102531 FNDC3A 0.7941594 0.7992886 None 0.7967240
ENSG00000253731 PCDHGA6 0.7661406 0.8119576 None 0.7890491
ENSG00000133812 SBF2 0.7728464 0.8050416 None 0.7889440
ENSG00000072364 AFF4 0.7841051 0.7901701 6 0.7871376
ENSG00000148841 ITPRIP 0.7670328 0.8067253 None 0.7868791
ENSG00000168209 DDIT4 0.7439992 0.8279055 None 0.7859523
kable(top_genes[[2]], caption=paste0('Top 10 genes for module ', top_modules[2], '  (MTcor = ',
                                  round(moduleTraitCor[paste0('ME',top_modules[2]),1],2),')'))
Top 10 genes for module #E46DF5 (MTcor = -0.91)
ID external_gene_id MM GS gene.score importance
ENSG00000050748 MAPK9 0.8990191 -0.9501362 None 0.9245776
ENSG00000177432 NAP1L5 0.8829939 -0.9182444 None 0.9006191
ENSG00000138078 PREPL 0.8757652 -0.9145463 None 0.8951558
ENSG00000155097 ATP6V1C1 0.8612938 -0.9176117 None 0.8894527
ENSG00000125814 NAPB 0.8940304 -0.8843363 None 0.8891833
ENSG00000163577 EIF5A2 0.8546741 -0.9233279 None 0.8890010
ENSG00000128881 TTBK2 0.9059408 -0.8626445 None 0.8842926
ENSG00000171132 PRKCE 0.8634904 -0.9045455 None 0.8840180
ENSG00000176490 DIRAS1 0.8594598 -0.9057699 None 0.8826149
ENSG00000114573 ATP6V1A 0.8088574 -0.9496685 None 0.8792630
ENSG00000131437 KIF3A 0.8643867 -0.8914761 None 0.8779314
ENSG00000196876 SCN8A 0.9494013 -0.7956552 3 0.8725282
ENSG00000111674 ENO2 0.8638465 -0.8786881 None 0.8712673
ENSG00000172348 RCAN2 0.8131262 -0.9275533 None 0.8703397
ENSG00000130540 SULT4A1 0.8737602 -0.8601999 None 0.8669801
ENSG00000184368 MAP7D2 0.7777380 -0.9482718 None 0.8630049
ENSG00000065609 SNAP91 0.8971523 -0.8179428 None 0.8575475
ENSG00000022355 GABRA1 0.9111891 -0.8010918 5 0.8561405
ENSG00000144285 SCN1A 0.8919455 -0.8124748 3 0.8522102
ENSG00000198825 INPP5F 0.8379959 -0.8652389 None 0.8516174
rm(create_table)
pca = datExpr %>% prcomp

ids = c()
for(tg in top_genes) ids = c(ids, tg$ID)

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
            mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
            mutate(alpha = ifelse(color %in% top_modules & 
                                  ID %in% ids, 1, 0.1))

plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) + 
              theme_minimal() + ggtitle('Important genes identified through WGCNA')

Level of expression by Diagnosis for top genes, ordered by importance (defined above)

create_plot = function(i){
  
  plot_data = datExpr[rownames(datExpr) %in% top_genes[[i]]$ID,] %>% mutate('ID' = rownames(.)) %>% 
              melt(id.vars='ID') %>% mutate(variable = gsub('X','',variable)) %>%
              left_join(top_genes[[i]], by='ID') %>%
              left_join(datMeta %>% dplyr::select(Dissected_Sample_ID, Diagnosis),
                        by = c('variable'='Dissected_Sample_ID')) %>% arrange(desc(importance))
  
  p = ggplotly(plot_data %>% mutate(external_gene_id=factor(external_gene_id, 
                                    levels=unique(plot_data$external_gene_id), ordered=T)) %>%
               ggplot(aes(external_gene_id, value, fill=Diagnosis)) + geom_boxplot() + theme_minimal() +
                      xlab(paste0('Top genes for module ', top_modules[i], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[i]][1],2), ')')) + ylab('Level of Expression') +
                      theme(axis.text.x = element_text(angle = 90, hjust = 1)))
  return(p)
  
}

create_plot(1)
create_plot(2)
rm(create_plot)




Enrichment Analysis


Using the package anRichment

# Prepare dataset

# Create dataset with top modules membership and removing the genes without an assigned module
EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID,
                        module = ifelse(genes_info$Module %in% top_modules, genes_info$Module, 'other')) %>%
             filter(genes_info$Module!='gray')

# Assign Entrez Gene Id to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl', host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=EA_dataset$ensembl_gene_id, mart=mart)
## Batch submitting query [=>-----------------------------] 6% eta: 7s Batch
## submitting query [==>----------------------------] 9% eta: 7s Batch submitting
## query [===>---------------------------] 12% eta: 7s Batch submitting
## query [====>--------------------------] 15% eta: 6s Batch submitting
## query [=====>-------------------------] 18% eta: 6s Batch submitting
## query [======>------------------------] 21% eta: 6s Batch submitting
## query [=======>-----------------------] 24% eta: 6s Batch submitting
## query [=======>-----------------------] 27% eta: 5s Batch submitting
## query [========>----------------------] 30% eta: 5s Batch submitting
## query [=========>---------------------] 33% eta: 5s Batch submitting
## query [==========>--------------------] 36% eta: 5s Batch submitting
## query [===========>-------------------] 39% eta: 5s Batch submitting
## query [============>------------------] 42% eta: 5s Batch submitting
## query [=============>-----------------] 45% eta: 4s Batch submitting
## query [==============>----------------] 48% eta: 4s Batch submitting
## query [===============>---------------] 52% eta: 4s Batch submitting
## query [================>--------------] 55% eta: 4s Batch submitting
## query [=================>-------------] 58% eta: 3s Batch submitting
## query [==================>------------] 61% eta: 3s Batch submitting
## query [===================>-----------] 64% eta: 3s Batch submitting
## query [====================>----------] 67% eta: 3s Batch submitting
## query [=====================>---------] 70% eta: 2s Batch submitting
## query [======================>--------] 73% eta: 2s Batch submitting
## query [======================>--------] 76% eta: 2s Batch submitting
## query [=======================>-------] 79% eta: 2s Batch submitting
## query [========================>------] 82% eta: 1s Batch submitting
## query [=========================>-----] 85% eta: 1s Batch submitting
## query [==========================>----] 88% eta: 1s Batch submitting
## query [===========================>---] 91% eta: 1s Batch submitting
## query [============================>--] 94% eta: 0s Batch submitting
## query [=============================>-] 97% eta: 0s Batch submitting query
## [===============================] 100% eta: 0s
EA_dataset = EA_dataset %>% left_join(biomart_output, by='ensembl_gene_id')

for(tm in top_modules){
  cat(paste0('\n',sum(EA_dataset$module==tm & is.na(EA_dataset$entrezgene)), ' genes from top module ',
             tm, ' don\'t have an Entrez Gene ID')) 
}
## 
## 34 genes from top module #00C0B5 don't have an Entrez Gene ID
## 14 genes from top module #E46DF5 don't have an Entrez Gene ID
rm(getinfo, mart, biomart_output, tm)
# Manual: https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/GeneAnnotation/Tutorials/anRichment-Tutorial1.pdf

collectGarbage()

# EA_dataset = rbind(EA_dataset[EA_dataset$module!='other',], EA_dataset[EA_dataset$module=='other',][sample(sum(EA_dataset$module=='other'), 1000),])

# Prepare datasets
GO_col = buildGOcollection(organism = 'human', verbose = 0)
## Loading required package: org.Hs.eg.db
## 
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
internal_col = internalCollection(organism = 'human')
MillerAIBS_col = MillerAIBSCollection(organism = 'human')
BrainDisease_col = BrainDiseaseCollection(organism = 'human')
combined_col = mergeCollections(GO_col, internal_col, MillerAIBS_col, BrainDisease_col)

# Print collections used
cat('Using collections: ')
## Using collections:
knownGroups(combined_col, sortBy = 'size')
##  [1] "GO"                                         
##  [2] "GO.BP"                                      
##  [3] "GO.MF"                                      
##  [4] "GO.CC"                                      
##  [5] "JA Miller at AIBS"                          
##  [6] "Chip-X enrichment analysis (ChEA)"          
##  [7] "Brain"                                      
##  [8] "JAM"                                        
##  [9] "Prenatal brain"                             
## [10] "Brain region markers"                       
## [11] "Cortex"                                     
## [12] "Brain region marker enriched gene sets"     
## [13] "WGCNA"                                      
## [14] "BrainRegionMarkers"                         
## [15] "BrainRegionMarkers.HBA"                     
## [16] "BrainRegionMarkers.HBA.localMarker(top200)" 
## [17] "Postnatal brain"                            
## [18] "ImmunePathways"                             
## [19] "Markers of cortex layers"                   
## [20] "BrainLists"                                 
## [21] "Cell type markers"                          
## [22] "Germinal brain"                             
## [23] "BrainRegionMarkers.HBA.globalMarker(top200)"
## [24] "Accelerated evolution"                      
## [25] "Postmitotic brain"                          
## [26] "BrainLists.Blalock_AD"                      
## [27] "BrainLists.DiseaseGenes"                    
## [28] "BloodAtlases"                               
## [29] "Verge Disease Genes"                        
## [30] "BloodAtlases.Whitney"                       
## [31] "BrainLists.JAXdiseaseGene"                  
## [32] "BrainLists.MO"                              
## [33] "Age-associated genes"                       
## [34] "BrainLists.Lu_Aging"                        
## [35] "Cell type marker enriched gene sets"        
## [36] "BrainLists.CA1vsCA3"                        
## [37] "BrainLists.MitochondrialType"               
## [38] "BrainLists.MO.2+_26Mar08"                   
## [39] "BrainLists.MO.Sugino"                       
## [40] "BloodAtlases.Gnatenko2"                     
## [41] "BloodAtlases.Kabanova"                      
## [42] "BrainLists.Voineagu"                        
## [43] "StemCellLists"                              
## [44] "StemCellLists.Lee"
# Perform Enrichment Analysis
enrichment = enrichmentAnalysis(classLabels = EA_dataset$module, identifiers = EA_dataset$entrezgene,
                                refCollection = combined_col, #useBackground = 'given', 
                                threshold = 1e-4, thresholdType = 'Bonferroni',
                                getOverlapEntrez = FALSE, getOverlapSymbols = TRUE)
##  enrichmentAnalysis: preparing data..
##   ..working on label set 1 ..


Results


kable(enrichment$enrichmentTable %>% filter(class==top_modules[1]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, desc(enrichmentRatio)),
      caption = paste0('Enriched terms for module ', top_modules[1], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[1]][1],4), ')'))
Enriched terms for module #00C0B5 (MTcor = 0.9999)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
GO:0006351 transcription, DNA-templated GO|GO.BP 4.50e-06 0e+00 1.370702 1299 2952 334
GO:0006355 regulation of transcription, DNA-templated GO|GO.BP 6.60e-06 1e-07 1.381922 1299 2779 317
GO:0097659 nucleic acid-templated transcription GO|GO.BP 8.00e-06 1e-07 1.361793 1299 2998 337
GO:1903506 regulation of nucleic acid-templated transcription GO|GO.BP 9.10e-06 1e-07 1.373568 1299 2840 322
GO:2001141 regulation of RNA biosynthetic process GO|GO.BP 1.22e-05 1e-07 1.370191 1299 2847 322
GO:0032774 RNA biosynthetic process GO|GO.BP 1.37e-05 1e-07 1.355913 1299 3011 337
GO:2000112 regulation of cellular macromolecule biosynthetic process GO|GO.BP 4.12e-05 3e-07 1.335813 1299 3147 347
GO:0051252 regulation of RNA metabolic process GO|GO.BP 6.45e-05 5e-07 1.339434 1299 3039 336
GO:0010556 regulation of macromolecule biosynthetic process GO|GO.BP 6.78e-05 5e-07 1.323019 1299 3269 357
GO:0098609 cell-cell adhesion GO|GO.BP 7.90e-05 6e-07 1.837215 1299 666 101
kable(enrichment$enrichmentTable %>% filter(class==top_modules[2]) %>% 
      dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
                    effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
      arrange(Bonferroni, enrichmentRatio),
      caption = paste0('Enriched terms for module ', top_modules[2], ' (MTcor = ',
                       round(genes_info$MTcor[genes_info$Module==top_modules[2]][1],4), ')'))
Enriched terms for module #E46DF5 (MTcor = -0.9092)
dataSetID shortDataSetName inGroups Bonferroni FDR enrichmentRatio effectiveClassSize effectiveSetSize nCommonGenes
JAMiller.AIBS.000142 Highest in CP of 13-16 post-conception weeks human JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain 0.00e+00 0e+00 2.897111 986 1212 220
JAMiller.AIBS.000052 CortexWGCNA 15-21 post-conception weeks C26 JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA 0.00e+00 0e+00 3.236118 986 725 147
JAMiller.AIBS.000569 WGCNA humanSpecificOlivedrab2Module frontalCtx FOXP2 JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA 0.00e+00 0e+00 1.707653 986 4047 433
JAMiller.AIBS.000150 Highest in CP of E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain 0.00e+00 0e+00 2.526654 986 1276 202
JAMiller.AIBS.000155 Lowest in VZ of E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex 0.00e+00 0e+00 2.214607 986 1672 232
JAM:002744 Autism_differential_expression_across_at_least_one_comparison JAM|BrainLists|BrainLists.Voineagu 0.00e+00 0e+00 2.882908 986 764 138
JAMiller.AIBS.000506 Genes bound by SUZ12 in MOUSE MESC from PMID 20075857 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 1.715973 986 3367 362
JAM:003016 downAD_synapticTransmission JAM|BrainLists|BrainLists.Blalock_AD 0.00e+00 0e+00 7.711227 986 89 43
JAMiller.AIBS.000570 WGCNA Olivedrab2ModuleGenes with enriched ELAVL2 targets JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA 0.00e+00 0e+00 3.311296 986 482 100
GO:0045202 synapse GO|GO.CC 0.00e+00 0e+00 2.415470 986 1044 158
GO:0097458 neuron part GO|GO.CC 0.00e+00 0e+00 2.161076 986 1418 192
GO:0044456 synapse part GO|GO.CC 0.00e+00 0e+00 2.585553 986 821 133
JAMiller.AIBS.000141 CP enriched in E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain 0.00e+00 0e+00 2.912081 986 570 104
JAMiller.AIBS.000504 Genes bound by SUZ12 in mouse MESC from PMID 18692474 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 2.044713 986 1366 175
JAMiller.AIBS.000123 HippocampusWGCNA turquoise DGenriched upAge JA Miller at AIBS|Brain|Postnatal brain|WGCNA 0.00e+00 0e+00 2.174448 986 1101 150
JAMiller.AIBS.000505 Genes bound by SUZ12 in MOUSE MESC from PMID 18974828 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 2.010855 986 1389 175
GO:0007268 chemical synaptic transmission GO|GO.BP 0.00e+00 0e+00 2.766866 986 548 95
GO:0098916 anterograde trans-synaptic signaling GO|GO.BP 0.00e+00 0e+00 2.766866 986 548 95
GO:0099536 synaptic signaling GO|GO.BP 0.00e+00 0e+00 2.745883 986 558 96
GO:0099537 trans-synaptic signaling GO|GO.BP 0.00e+00 0e+00 2.741849 986 553 95
JAMiller.AIBS.000503 Genes bound by SUZ12 in mouse MESC from PMID 18555785 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 2.399283 986 765 115
GO:1990351 transporter complex GO|GO.CC 0.00e+00 0e+00 3.732685 986 248 58
GO:0043005 neuron projection GO|GO.CC 0.00e+00 0e+00 2.156817 986 1036 140
GO:1902495 transmembrane transporter complex GO|GO.CC 0.00e+00 0e+00 3.743808 986 243 57
GO:0034702 ion channel complex GO|GO.CC 0.00e+00 0e+00 3.776356 986 224 53
JAMiller.AIBS.000364 Genes bound by MTF2 in MOUSE MESC from PMID 20144788 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 1.675637 986 2286 240
JAMiller.AIBS.000463 Genes bound by SMAD4 in HUMAN A2780 from PMID 21799915 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 1.696132 986 2089 222
GO:0097060 synaptic membrane GO|GO.CC 0.00e+00 0e+00 2.901899 986 374 68
GO:0030424 axon GO|GO.CC 0.00e+00 0e+00 2.586476 986 506 82
JAM:002764 downAging_mitochondria_synapse JAM|BrainLists|BrainLists.Lu_Aging 0.00e+00 0e+00 2.823771 986 390 69
JAM:002805 Cerebral Cortex JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 4.039372 986 162 41
JAM:003072 Tail of Caudate Nucleus_IN_Striatum JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 4.039372 986 162 41
GO:0098794 postsynapse GO|GO.CC 0.00e+00 0e+00 2.464481 986 544 84
GO:0098793 presynapse GO|GO.CC 0.00e+00 0e+00 2.685652 986 416 70
GO:0034220 ion transmembrane transport GO|GO.BP 0.00e+00 0e+00 2.066308 986 896 116
GO:0034703 cation channel complex GO|GO.CC 0.00e+00 0e+00 3.872061 986 169 41
GO:0071944 cell periphery GO|GO.CC 0.00e+00 0e+00 1.412039 986 3990 353
JAMiller.AIBS.000584 noncodingHumanAcceleratedRegions fromSeveralSources JA Miller at AIBS|Accelerated evolution 0.00e+00 0e+00 2.070643 986 871 113
JAMiller.AIBS.000436 Genes bound by REST in MOUSE MESC from PMID 21632747 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 0.00e+00 0e+00 1.715152 986 1675 180
JAM:002991 downAD_synapticTransmission JAM|BrainLists|BrainLists.Blalock_AD 0.00e+00 0e+00 4.936221 986 97 30
JAM:002751 Basal Pons JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 3.822861 986 167 40
JAMiller.AIBS.000149 Lowest in VZ of 13-16 post-conception weeks human JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex 0.00e+00 0e+00 2.280064 986 616 88
GO:0006811 ion transport GO|GO.BP 0.00e+00 0e+00 1.812581 986 1312 149
GO:0007610 behavior GO|GO.BP 0.00e+00 0e+00 2.414449 986 509 77
GO:0042995 cell projection GO|GO.CC 0.00e+00 0e+00 1.665905 986 1782 186
GO:0045211 postsynaptic membrane GO|GO.CC 0.00e+00 0e+00 2.989059 986 283 53
GO:0006812 cation transport GO|GO.BP 0.00e+00 0e+00 2.008506 986 890 112
GO:0120025 plasma membrane bounded cell projection GO|GO.CC 0.00e+00 0e+00 1.676634 986 1723 181
GO:0007267 cell-cell signaling GO|GO.BP 0.00e+00 0e+00 1.792092 986 1327 149
JAM:002882 Hippocampal Formation JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets 0.00e+00 0e+00 3.683180 986 169 39
GO:0030001 metal ion transport GO|GO.BP 1.00e-07 0e+00 2.163734 986 686 93
GO:0043269 regulation of ion transport GO|GO.BP 1.00e-07 0e+00 2.308565 986 560 81
JAMiller.AIBS.000490 Genes bound by STAT3 in HUMAN U87 from PMID 23295773 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 1.00e-07 0e+00 1.523326 986 2546 243
GO:0005886 plasma membrane GO|GO.CC 1.00e-07 0e+00 1.390880 986 3913 341
JAM:002824 Dentate Nucleus_IN_Cerebellar Nucleus JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 1.00e-07 0e+00 3.675739 986 165 38
JAM:002769 downAD_mitochondrion JAM|BrainLists|BrainLists.Blalock_AD 1.00e-07 0e+00 3.011405 986 265 50
GO:0046873 metal ion transmembrane transporter activity GO|GO.MF 1.00e-07 0e+00 2.896930 986 292 53
JAMiller.AIBS.000106 Genes enriched in the hippocampal SGZ in mouse JA Miller at AIBS|Brain|Postnatal brain|Markers of cortex layers 1.00e-07 0e+00 2.738775 986 338 58
JAMiller.AIBS.000095 Cortical PNOC neurons JA Miller at AIBS|Brain|Postnatal brain|Cell type markers|Cortex 1.00e-07 0e+00 1.383643 986 3945 342
GO:0005216 ion channel activity GO|GO.MF 3.00e-07 0e+00 2.907081 986 280 51
GO:0015318 inorganic molecular entity transmembrane transporter activity GO|GO.MF 3.00e-07 0e+00 2.319300 986 523 76
GO:0050877 nervous system process GO|GO.BP 3.00e-07 0e+00 2.002276 986 829 104
JAMiller.AIBS.000518 Genes bound by TCF4 in HUMAN U87 from PMID 23295773 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 3.00e-07 0e+00 1.452871 986 3021 275
JAM:003054 subiculum_IN_Hippocampal Formation JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 5.00e-07 0e+00 3.579009 986 165 37
GO:0022838 substrate-specific channel activity GO|GO.MF 6.00e-07 0e+00 2.846094 986 286 51
JAM:003085 trochlear nucleus_IN_Mesencephalon JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 7.00e-07 0e+00 3.699441 986 151 35
GO:0042391 regulation of membrane potential GO|GO.BP 7.00e-07 0e+00 2.684039 986 333 56
GO:0098978 glutamatergic synapse GO|GO.CC 8.00e-07 0e+00 2.644609 986 344 57
JAM:002907 inferior olivary complex_IN_Myelencephalon JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 1.00e-06 0e+00 3.494299 986 169 37
GO:0007399 nervous system development GO|GO.BP 1.00e-06 0e+00 1.573789 986 2008 198
GO:0022890 inorganic cation transmembrane transporter activity GO|GO.MF 1.30e-06 0e+00 2.540124 986 377 60
GO:0008324 cation transmembrane transporter activity GO|GO.MF 1.70e-06 0e+00 2.458455 986 409 63
JAMiller.AIBS.000005 CPi markers at 21 post-conception weeks JA Miller at AIBS|Brain|Prenatal brain|Cortex|Markers of cortex layers|Postmitotic brain 1.90e-06 0e+00 2.702568 986 313 53
GO:0050804 modulation of chemical synaptic transmission GO|GO.BP 2.00e-06 0e+00 2.538184 986 371 59
GO:0099177 regulation of trans-synaptic signaling GO|GO.BP 2.20e-06 0e+00 2.531361 986 372 59
GO:0034765 regulation of ion transmembrane transport GO|GO.BP 2.30e-06 0e+00 2.461561 986 402 62
JAM:002920 Lateral Nucleus_IN_Amygdala JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 3.10e-06 0e+00 3.440575 986 167 36
JAM:003058 Substantia Nigra, pars compacta_IN_Mesencephalon JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 3.10e-06 0e+00 3.440575 986 167 36
GO:0055085 transmembrane transport GO|GO.BP 3.10e-06 0e+00 1.743685 986 1254 137
GO:0023061 signal release GO|GO.BP 3.50e-06 0e+00 2.504432 986 376 59
JAMiller.AIBS.000218 Genes bound by AR in HUMAN PROSTATE CANCER from PMID 22383394 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 3.90e-06 0e+00 1.669277 986 1482 155
GO:0022803 passive transmembrane transporter activity GO|GO.MF 4.20e-06 0e+00 2.677236 986 310 52
GO:0003008 system process GO|GO.BP 4.30e-06 0e+00 1.680633 986 1434 151
GO:0005261 cation channel activity GO|GO.MF 5.50e-06 1e-07 3.033207 986 221 42
JAMiller.AIBS.000140 SZ/IZ/CP enriched in E14.5 mouse JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex 7.10e-06 1e-07 3.956008 986 117 29
GO:0051049 regulation of transport GO|GO.BP 7.20e-06 1e-07 1.642531 986 1545 159
GO:0015075 ion transmembrane transporter activity GO|GO.MF 7.50e-06 1e-07 2.156060 986 570 77
JAMiller.AIBS.000071 GerminalZonesWGCNA 15-21 post-conception weeks G3 IntermediateProgenitors JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA|Germinal brain 1.02e-05 1e-07 2.586861 986 327 53
GO:0036477 somatodendritic compartment GO|GO.CC 1.03e-05 1e-07 2.020891 986 695 88
GO:0015267 channel activity GO|GO.MF 1.17e-05 1e-07 2.634248 986 309 51
GO:0015077 monovalent inorganic cation transmembrane transporter activity GO|GO.MF 1.51e-05 1e-07 2.988029 986 219 41
JAM:002739 arcuate nucleus of medulla_IN_Myelencephalon JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 1.56e-05 1e-07 3.325093 986 168 35
JAM:002875 downAD_ionAndCalciumTransport JAM|BrainLists|BrainLists.Blalock_AD 1.59e-05 1e-07 4.506479 986 85 24
JAM:003105 ventral tegmental area_IN_Mesencephalon JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 1.97e-05 2e-07 3.522305 986 145 32
JAM:002918 lateral medullary reticular group_IN_Myelencephalon JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets 2.30e-05 2e-07 3.349723 986 162 34
JAMiller.AIBS.000471 Genes bound by SOX2 in HUMAN LN229 GBM from PMID 21211035 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 2.80e-05 2e-07 1.452078 986 2572 234
GO:0023052 signaling GO|GO.BP 2.98e-05 2e-07 1.286249 986 4951 399
JAMiller.AIBS.000217 Genes bound by AR in HUMAN PC3 from PMID 19668381 JA Miller at AIBS|Chip-X enrichment analysis (ChEA) 2.99e-05 2e-07 1.439057 986 2684 242
GO:0034762 regulation of transmembrane transport GO|GO.BP 3.05e-05 2e-07 2.246033 986 469 66
GO:0098655 cation transmembrane transport GO|GO.BP 3.23e-05 3e-07 2.045564 986 632 81
GO:0022839 ion gated channel activity GO|GO.MF 3.55e-05 3e-07 2.864695 986 234 42
GO:0098660 inorganic ion transmembrane transport GO|GO.BP 3.80e-05 3e-07 2.049495 986 623 80
GO:0022836 gated channel activity GO|GO.MF 4.07e-05 3e-07 2.852505 986 235 42
GO:0098590 plasma membrane region GO|GO.CC 4.08e-05 3e-07 1.785565 986 1019 114
GO:0033267 axon part GO|GO.CC 4.37e-05 3e-07 2.487952 986 340 53
JAM:002763 downAD_metalIonTransport_glycoprotein JAM|BrainLists|BrainLists.Blalock_AD 4.40e-05 3e-07 2.660074 986 282 47
JAM:002997 Polycomb_genes_(PGSTs)_repressed_by_SCs JAM|StemCellLists|StemCellLists.Lee 4.91e-05 4e-07 1.749090 986 1095 120
GO:0044463 cell projection part GO|GO.CC 5.43e-05 4e-07 1.700703 986 1220 130
GO:0120038 plasma membrane bounded cell projection part GO|GO.CC 5.43e-05 4e-07 1.700703 986 1220 130
GO:0044459 plasma membrane part GO|GO.CC 6.04e-05 4e-07 1.521192 986 1983 189

Save Enrichment Analysis results

save(enrichment, file='./../Data/enrichmentAnalysis_imputed.RData')
#load('./../Data/enrichmentAnalysis.RData')



Session info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] org.Hs.eg.db_3.8.2                      
##  [2] BrainDiseaseCollection_1.00             
##  [3] anRichment_1.01-2                       
##  [4] TxDb.Mmusculus.UCSC.mm10.knownGene_3.4.7
##  [5] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 
##  [6] GenomicFeatures_1.36.4                  
##  [7] GenomicRanges_1.36.1                    
##  [8] GenomeInfoDb_1.20.0                     
##  [9] anRichmentMethods_0.90-1                
## [10] WGCNA_1.69                              
## [11] fastcluster_1.1.25                      
## [12] dynamicTreeCut_1.63-1                   
## [13] GO.db_3.8.2                             
## [14] AnnotationDbi_1.46.1                    
## [15] IRanges_2.18.3                          
## [16] S4Vectors_0.22.1                        
## [17] Biobase_2.44.0                          
## [18] BiocGenerics_0.30.0                     
## [19] biomaRt_2.40.5                          
## [20] knitr_1.28                              
## [21] doParallel_1.0.15                       
## [22] iterators_1.0.12                        
## [23] foreach_1.5.0                           
## [24] polycor_0.7-10                          
## [25] expss_0.10.2                            
## [26] GGally_1.5.0                            
## [27] gridExtra_2.3                           
## [28] viridis_0.5.1                           
## [29] viridisLite_0.3.0                       
## [30] RColorBrewer_1.1-2                      
## [31] dendextend_1.13.4                       
## [32] plotly_4.9.2                            
## [33] glue_1.3.2                              
## [34] reshape2_1.4.3                          
## [35] forcats_0.5.0                           
## [36] stringr_1.4.0                           
## [37] dplyr_0.8.5                             
## [38] purrr_0.3.3                             
## [39] readr_1.3.1                             
## [40] tidyr_1.0.2                             
## [41] tibble_3.0.0                            
## [42] ggplot2_3.3.0                           
## [43] tidyverse_1.3.0                         
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1                backports_1.1.5            
##   [3] Hmisc_4.4-0                 plyr_1.8.6                 
##   [5] lazyeval_0.2.2              splines_3.6.3              
##   [7] crosstalk_1.1.0.1           BiocParallel_1.18.1        
##   [9] digest_0.6.25               htmltools_0.4.0            
##  [11] fansi_0.4.1                 magrittr_1.5               
##  [13] checkmate_2.0.0             memoise_1.1.0              
##  [15] cluster_2.1.0               annotate_1.62.0            
##  [17] Biostrings_2.52.0           modelr_0.1.6               
##  [19] matrixStats_0.56.0          prettyunits_1.1.1          
##  [21] jpeg_0.1-8.1                colorspace_1.4-1           
##  [23] blob_1.2.1                  rvest_0.3.5                
##  [25] haven_2.2.0                 xfun_0.12                  
##  [27] crayon_1.3.4                RCurl_1.98-1.1             
##  [29] jsonlite_1.6.1              genefilter_1.66.0          
##  [31] impute_1.58.0               survival_3.1-11            
##  [33] gtable_0.3.0                zlibbioc_1.30.0            
##  [35] XVector_0.24.0              DelayedArray_0.10.0        
##  [37] scales_1.1.0                mvtnorm_1.1-0              
##  [39] DBI_1.1.0                   Rcpp_1.0.4                 
##  [41] xtable_1.8-4                progress_1.2.2             
##  [43] htmlTable_1.13.3            foreign_0.8-75             
##  [45] bit_1.1-15.2                preprocessCore_1.46.0      
##  [47] Formula_1.2-3               htmlwidgets_1.5.1          
##  [49] httr_1.4.1                  acepack_1.4.1              
##  [51] ellipsis_0.3.0              farver_2.0.3               
##  [53] pkgconfig_2.0.3             reshape_0.8.8              
##  [55] XML_3.99-0.3                nnet_7.3-13                
##  [57] dbplyr_1.4.2                locfit_1.5-9.4             
##  [59] labeling_0.3                tidyselect_1.0.0           
##  [61] rlang_0.4.5                 munsell_0.5.0              
##  [63] cellranger_1.1.0            tools_3.6.3                
##  [65] cli_2.0.2                   generics_0.0.2             
##  [67] RSQLite_2.2.0               broom_0.5.5                
##  [69] evaluate_0.14               yaml_2.2.1                 
##  [71] bit64_0.9-7                 fs_1.4.0                   
##  [73] nlme_3.1-144                xml2_1.2.5                 
##  [75] compiler_3.6.3              rstudioapi_0.11            
##  [77] curl_4.3                    png_0.1-7                  
##  [79] reprex_0.3.0                geneplotter_1.62.0         
##  [81] stringi_1.4.6               highr_0.8                  
##  [83] lattice_0.20-40             Matrix_1.2-18              
##  [85] vctrs_0.2.4                 pillar_1.4.3               
##  [87] lifecycle_0.2.0             data.table_1.12.8          
##  [89] bitops_1.0-6                rtracklayer_1.44.4         
##  [91] R6_2.4.1                    latticeExtra_0.6-29        
##  [93] codetools_0.2-16            assertthat_0.2.1           
##  [95] SummarizedExperiment_1.14.1 DESeq2_1.24.0              
##  [97] withr_2.1.2                 GenomicAlignments_1.20.1   
##  [99] Rsamtools_2.0.3             GenomeInfoDbData_1.2.1     
## [101] mgcv_1.8-31                 hms_0.5.3                  
## [103] grid_3.6.3                  rpart_4.1-15               
## [105] rmarkdown_2.1               lubridate_1.7.4            
## [107] base64enc_0.1-3